public UmbracoSpellChecker(BaseLuceneSearcher searchProvider) { var searcher = (IndexSearcher)searchProvider.GetSearcher(); _indexReader = searcher.GetIndexReader(); _checker = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory(), new JaroWinklerDistance()); }
internal BaseLuceneSearcher InitMultiIndexSearcher(BaseLuceneSearcher searcher, NameValueCollection entryIndexSets) { var searchProvider = new MultiIndexSearcher(); searchProvider.Initialize(searcher.Name, entryIndexSets); return(searchProvider); }
public AlternateSpellingTool(BaseLuceneSearcher SearchProvider) { this._searchProvider = SearchProvider; var searcher = (IndexSearcher)SearchProvider.GetSearcher(); indexReader = searcher.GetIndexReader(); _luceneChecker = new SpellChecker(new RAMDirectory(), new JaroWinklerDistance()); }
public SiteSearchService(BaseLuceneSearcher searcher, NameValueCollection entryIndexSets = null, IAppSettingsHelper configHelper = null, IUmbracoTreeTraverser umbracoTree = null) { _searcher = entryIndexSets != null ? InitMultiIndexSearcher(searcher, entryIndexSets) : searcher; _configHelper = configHelper ?? new AppSettingsHelper(); _umbracoTree = umbracoTree ?? new UmbracoTreeTraverser(); }
private static IndexReader GetIndexReaderForSearcher(this BaseLuceneSearcher searcher) { var indexSearcher = searcher.GetSearcher() as IndexSearcher; if (indexSearcher == null) { throw new InvalidOperationException("The index searcher is not of type " + typeof(IndexSearcher) + " cannot execute this method"); } return(indexSearcher.GetIndexReader()); }
public void ProcessLuceneExcludeFilters(BaseLuceneSearcher searcher, LuceneSearchCriteria luceneSearchCriteria) { var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "", searcher.IndexingAnalyzer); var bQuery = new BooleanQuery(); //var sb = new StringBuilder(); foreach (var filter in Filters) { if (_booleanOperation != BooleanOperation.Not) { continue; } //a filter can return a true lucene query, if there is one use it, otherwise parse it's string format var luceneQueryObj = filter.GetLuceneQuery(); bQuery.Add(luceneQueryObj ?? queryParser.Parse(filter.ToString()), BooleanClause.Occur.MUST_NOT); } }
/// <summary> /// Generates the search criteria that we want to search on /// </summary> /// <param name="searcher"></param> /// <returns></returns> private ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher) { var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria(); //check if there's anything to process if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any()) { return(null); } var sb = new StringBuilder(); if (string.IsNullOrEmpty(NodeTypeAlias) == false) { sb.AppendFormat("+nodeTypeAlias: {0} ", NodeTypeAlias); } // Three possiblities: // * docs version (MajorDocsVersion) supplied, give current version and NEGATE OTHERS // * no docs version (MajorDocsVersion) is not suplied, use it and NEGATE others // * all versions are requests, this is currently not implemented var currentMajorVersions = new string[] { "6", "7", "8", "9" }; // add mandatory majorVersion is parameter is supplied string versionToFilterBy = MajorDocsVersion == null ? ConfigurationManager.AppSettings[Constants.AppSettings.DocumentationCurrentMajorVersion] : MajorDocsVersion.ToString(); //we filter by this version by excluding the other major versions in lucene so if (NodeTypeAlias.InvariantEquals("documentation") == false) { var versionsToNegate = currentMajorVersions.Where(f => f != versionToFilterBy).ToArray <string>(); foreach (var versionToNegate in versionsToNegate) { sb.AppendFormat("-majorVersion:{0} ", versionToNegate); } } // do it the other way around for documentation if (NodeTypeAlias.InvariantEquals("documentation")) { //we filter by this version by using the major versions var versionsToFind = currentMajorVersions.Where(f => f == versionToFilterBy).ToArray <string>(); foreach (var versionToFind in versionsToFind) { sb.AppendFormat("+majorVersion:{0} ", versionToFind); } } if (!string.IsNullOrEmpty(Term)) { sb.Append("+("); // Cleanup the term so there are no errors Term = Term .Replace("\"", string.Empty) .Replace(":", string.Empty) .Replace("\\", string.Empty) .Trim('*'); // Replace OR's with case insensitive matching Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase); // Replace double whitespaces with single space as they were giving errors Term = Regex.Replace(Term, @"\s{2,}", " "); // Do an exact phrase match with boost sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term); // SEARCH YAML stuff sb.AppendFormat("tags:\"{0}\"^20000 ", Term); sb.AppendFormat("keywords:\"{0}\"^20000 ", Term); // Now we need to split the phrase into individual terms so the query parser can understand var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); //now de-duplicate the terms and remove the stop words and don't include single chars var deduped = new List <string>(); foreach (var s in split) { if (s.Length > 1 && !deduped.Contains(s, StringComparer.InvariantCultureIgnoreCase) && !StandardAnalyzer.STOP_WORDS_SET.Contains(s)) { deduped.Add(s); } } if (deduped.Count > 20) { //truncate, we don't want to search on all of these individually deduped = deduped.Take(20).ToList(); } if (deduped.Count > 0) { //do standard match with boost on each term foreach (var s in deduped) { sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s); } //do suffix with wildcards foreach (var s in deduped) { sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s); } //do fuzzy (close match 0.9) foreach (var s in deduped) { sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s); } } sb.Append(")"); } //nothing to process, return if (sb.Length > 0) { //render out the raw query that was constructed above criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString()); } //Now we can apply any filters, this is done by using native Lucene query objects if (Filters.Any()) { //If there is a filter applied to the entire result then add it here, this is a MUST sub query foreach (var filter in Filters) { filter.ProcessLuceneAddFilters(searcher, criteria); } //need to process the excludes after - since that is how lucene works, you can only exclude after you've included foreach (var filter in Filters) { filter.ProcessLuceneExcludeFilters(searcher, criteria); } } if (string.IsNullOrEmpty(OrderBy) == false) { criteria.OrderByDescending(OrderBy); } return(criteria); }
/// <summary> /// Helper method to create an ISearchCriteria for searching all fields in a <see cref="BaseLuceneSearcher"/> /// </summary> /// <param name="searchText"></param> /// <param name="useWildcards"></param> /// <param name="searcher"></param> /// <returns></returns> /// <remarks> /// This is here because some of this stuff is internal in Examine /// </remarks> private ISearchCriteria SearchAllFields(string searchText, bool useWildcards, BaseLuceneSearcher searcher) { var sc = searcher.CreateSearchCriteria(); if (_examineGetSearchFields == null) { //get the GetSearchFields method from BaseLuceneSearcher _examineGetSearchFields = typeof(BaseLuceneSearcher).GetMethod("GetSearchFields", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static); } //get the results of searcher.BaseLuceneSearcher() using ugly reflection since it's not public var searchFields = (IEnumerable <string>)_examineGetSearchFields.Invoke(searcher, null); //this is what Examine does internally to create ISearchCriteria for searching all fields var strArray = searchText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); sc = useWildcards == false ? sc.GroupedOr(searchFields, strArray).Compile() : sc.GroupedOr(searchFields, strArray.Select(x => new CustomExamineValue(Examineness.ComplexWildcard, x.MultipleCharacterWildcard().Value)).ToArray <IExamineValue>()).Compile(); return(sc); }
/// <summary> /// Generates the search criteria that we want to search on /// </summary> /// <param name="searcher"></param> /// <returns></returns> public ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher) { var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria(); //check if there's anything to process if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any()) { return(null); } if (string.IsNullOrEmpty(NodeTypeAlias) == false) { criteria.Field("nodeTypeAlias", NodeTypeAlias); } var sb = new StringBuilder(); if (!string.IsNullOrEmpty(Term)) { //Cleanup the term so there are no errors Term = Term.Replace("\"", string.Empty) .Replace(":", string.Empty) .Replace("\\", string.Empty).Trim('*'); //replace OR's with case insensitive matching Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase); // Replace double whitespaces with single space as they were giving errors Term = Regex.Replace(Term, @"\s{2,}", " "); //now we need to split the phrase into individual terms so the query parser can understand var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { //do an exact phrase match with boost sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term); } if (split.Length > 0) { //do standard match with boost on each term foreach (var s in split) { sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s); } //do suffix with wildcards foreach (var s in split) { sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s); } //do fuzzy (close match 0.9) foreach (var s in split) { sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s); } } } //nothing to process, return if (sb.Length > 0) { //render out the raw query that was constructed above criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString()); } //Now we can apply any filters, this is done by using native Lucene query objects if (Filters.Any()) { //If there is a filter applied to the entire result then add it here, this is a MUST sub query foreach (var filter in Filters) { filter.ProcessLuceneAddFilters(searcher, criteria); } //need to process the excludes after - since that is how lucene works, you can only exclude after you've included foreach (var filter in Filters) { filter.ProcessLuceneExcludeFilters(searcher, criteria); } } if (string.IsNullOrEmpty(OrderBy) == false) { criteria.OrderByDescending(OrderBy); } return(criteria); }