/// <summary> /// Helper method to create an ISearchCriteria for searching all fields in a <see cref="BaseLuceneSearcher"/> /// </summary> /// <param name="searchText"></param> /// <param name="useWildcards"></param> /// <param name="searcher"></param> /// <returns></returns> /// <remarks> /// This is here because some of this stuff is internal in Examine /// </remarks> private ISearchCriteria SearchAllFields(string searchText, bool useWildcards, BaseLuceneSearcher searcher) { var sc = searcher.CreateSearchCriteria(); if (_examineGetSearchFields == null) { //get the GetSearchFields method from BaseLuceneSearcher _examineGetSearchFields = typeof(BaseLuceneSearcher).GetMethod("GetSearchFields", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static); } //get the results of searcher.BaseLuceneSearcher() using ugly reflection since it's not public var searchFields = (IEnumerable <string>)_examineGetSearchFields.Invoke(searcher, null); //this is what Examine does internally to create ISearchCriteria for searching all fields var strArray = searchText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); sc = useWildcards == false ? sc.GroupedOr(searchFields, strArray).Compile() : sc.GroupedOr(searchFields, strArray.Select(x => new CustomExamineValue(Examineness.ComplexWildcard, x.MultipleCharacterWildcard().Value)).ToArray <IExamineValue>()).Compile(); return(sc); }
public IEnumerable <SearchResult> GetRawResults(string searchTerm, bool isFuzzy) { searchTerm = searchTerm.Trim(); //sanitise input var searchCriteria = _searcher.CreateSearchCriteria(BooleanOperation.Or); bool isPhrase = searchTerm.Contains(' '); string luceneRawQuery; if (isFuzzy && !isPhrase)//fuzzy single word { luceneRawQuery = searchTerm + '~'; } else if (isFuzzy)//fuzzy multiple words { //More info about grouping https://lucene.apache.org/core/2_9_4/queryparsersyntax.html var searchedTerms = searchTerm.Split(' ', ',', '.'); var luceneString = new StringBuilder(); for (var i = 0; i < searchedTerms.Length; i++) { var word = searchedTerms[i]; luceneString.Append(word + "~"); if (i != searchedTerms.Length - 1) { luceneString.Append(" AND "); } } luceneRawQuery = luceneString.ToString(); } else //not fuzzy, exact word or phrase { luceneRawQuery = "\"" + searchTerm + "\"" + "~1"; //one word proximity/distance } var query = searchCriteria.RawQuery(luceneRawQuery); var results = _searcher.Search(query).OrderByDescending(x => x.Score); return(isFuzzy ? results.TakeWhile(x => x.Score > 0.05f) : results); }
/// <summary> /// Generates the search criteria that we want to search on /// </summary> /// <param name="searcher"></param> /// <returns></returns> private ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher) { var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria(); //check if there's anything to process if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any()) { return(null); } var sb = new StringBuilder(); if (string.IsNullOrEmpty(NodeTypeAlias) == false) { sb.AppendFormat("+nodeTypeAlias: {0} ", NodeTypeAlias); } // Three possiblities: // * docs version (MajorDocsVersion) supplied, give current version and NEGATE OTHERS // * no docs version (MajorDocsVersion) is not suplied, use it and NEGATE others // * all versions are requests, this is currently not implemented var currentMajorVersions = new string[] { "6", "7", "8", "9" }; // add mandatory majorVersion is parameter is supplied string versionToFilterBy = MajorDocsVersion == null ? ConfigurationManager.AppSettings[Constants.AppSettings.DocumentationCurrentMajorVersion] : MajorDocsVersion.ToString(); //we filter by this version by excluding the other major versions in lucene so if (NodeTypeAlias.InvariantEquals("documentation") == false) { var versionsToNegate = currentMajorVersions.Where(f => f != versionToFilterBy).ToArray <string>(); foreach (var versionToNegate in versionsToNegate) { sb.AppendFormat("-majorVersion:{0} ", versionToNegate); } } // do it the other way around for documentation if (NodeTypeAlias.InvariantEquals("documentation")) { //we filter by this version by using the major versions var versionsToFind = currentMajorVersions.Where(f => f == versionToFilterBy).ToArray <string>(); foreach (var versionToFind in versionsToFind) { sb.AppendFormat("+majorVersion:{0} ", versionToFind); } } if (!string.IsNullOrEmpty(Term)) { sb.Append("+("); // Cleanup the term so there are no errors Term = Term .Replace("\"", string.Empty) .Replace(":", string.Empty) .Replace("\\", string.Empty) .Trim('*'); // Replace OR's with case insensitive matching Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase); // Replace double whitespaces with single space as they were giving errors Term = Regex.Replace(Term, @"\s{2,}", " "); // Do an exact phrase match with boost sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term); // SEARCH YAML stuff sb.AppendFormat("tags:\"{0}\"^20000 ", Term); sb.AppendFormat("keywords:\"{0}\"^20000 ", Term); // Now we need to split the phrase into individual terms so the query parser can understand var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); //now de-duplicate the terms and remove the stop words and don't include single chars var deduped = new List <string>(); foreach (var s in split) { if (s.Length > 1 && !deduped.Contains(s, StringComparer.InvariantCultureIgnoreCase) && !StandardAnalyzer.STOP_WORDS_SET.Contains(s)) { deduped.Add(s); } } if (deduped.Count > 20) { //truncate, we don't want to search on all of these individually deduped = deduped.Take(20).ToList(); } if (deduped.Count > 0) { //do standard match with boost on each term foreach (var s in deduped) { sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s); } //do suffix with wildcards foreach (var s in deduped) { sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s); } //do fuzzy (close match 0.9) foreach (var s in deduped) { sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s); } } sb.Append(")"); } //nothing to process, return if (sb.Length > 0) { //render out the raw query that was constructed above criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString()); } //Now we can apply any filters, this is done by using native Lucene query objects if (Filters.Any()) { //If there is a filter applied to the entire result then add it here, this is a MUST sub query foreach (var filter in Filters) { filter.ProcessLuceneAddFilters(searcher, criteria); } //need to process the excludes after - since that is how lucene works, you can only exclude after you've included foreach (var filter in Filters) { filter.ProcessLuceneExcludeFilters(searcher, criteria); } } if (string.IsNullOrEmpty(OrderBy) == false) { criteria.OrderByDescending(OrderBy); } return(criteria); }
/// <summary> /// Generates the search criteria that we want to search on /// </summary> /// <param name="searcher"></param> /// <returns></returns> public ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher) { var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria(); //check if there's anything to process if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any()) { return(null); } if (string.IsNullOrEmpty(NodeTypeAlias) == false) { criteria.Field("nodeTypeAlias", NodeTypeAlias); } var sb = new StringBuilder(); if (!string.IsNullOrEmpty(Term)) { //Cleanup the term so there are no errors Term = Term.Replace("\"", string.Empty) .Replace(":", string.Empty) .Replace("\\", string.Empty).Trim('*'); //replace OR's with case insensitive matching Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase); // Replace double whitespaces with single space as they were giving errors Term = Regex.Replace(Term, @"\s{2,}", " "); //now we need to split the phrase into individual terms so the query parser can understand var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { //do an exact phrase match with boost sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term); } if (split.Length > 0) { //do standard match with boost on each term foreach (var s in split) { sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s); } //do suffix with wildcards foreach (var s in split) { sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s); } //do fuzzy (close match 0.9) foreach (var s in split) { sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s); } } } //nothing to process, return if (sb.Length > 0) { //render out the raw query that was constructed above criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString()); } //Now we can apply any filters, this is done by using native Lucene query objects if (Filters.Any()) { //If there is a filter applied to the entire result then add it here, this is a MUST sub query foreach (var filter in Filters) { filter.ProcessLuceneAddFilters(searcher, criteria); } //need to process the excludes after - since that is how lucene works, you can only exclude after you've included foreach (var filter in Filters) { filter.ProcessLuceneExcludeFilters(searcher, criteria); } } if (string.IsNullOrEmpty(OrderBy) == false) { criteria.OrderByDescending(OrderBy); } return(criteria); }