public UmbracoSpellChecker(BaseLuceneSearcher searchProvider)
        {
            var searcher = (IndexSearcher)searchProvider.GetSearcher();

            _indexReader = searcher.GetIndexReader();
            _checker     = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory(), new JaroWinklerDistance());
        }
        internal BaseLuceneSearcher InitMultiIndexSearcher(BaseLuceneSearcher searcher, NameValueCollection entryIndexSets)
        {
            var searchProvider = new MultiIndexSearcher();

            searchProvider.Initialize(searcher.Name, entryIndexSets);

            return(searchProvider);
        }
        public AlternateSpellingTool(BaseLuceneSearcher SearchProvider)
        {
            this._searchProvider = SearchProvider;
            var searcher = (IndexSearcher)SearchProvider.GetSearcher();

            indexReader    = searcher.GetIndexReader();
            _luceneChecker = new SpellChecker(new RAMDirectory(), new JaroWinklerDistance());
        }
        public SiteSearchService(BaseLuceneSearcher searcher, NameValueCollection entryIndexSets = null,
                                 IAppSettingsHelper configHelper = null, IUmbracoTreeTraverser umbracoTree = null)
        {
            _searcher = entryIndexSets != null
                ? InitMultiIndexSearcher(searcher, entryIndexSets)
                : searcher;

            _configHelper = configHelper ?? new AppSettingsHelper();
            _umbracoTree  = umbracoTree ?? new UmbracoTreeTraverser();
        }
        private static IndexReader GetIndexReaderForSearcher(this BaseLuceneSearcher searcher)
        {
            var indexSearcher = searcher.GetSearcher() as IndexSearcher;

            if (indexSearcher == null)
            {
                throw new InvalidOperationException("The index searcher is not of type " + typeof(IndexSearcher) + " cannot execute this method");
            }
            return(indexSearcher.GetIndexReader());
        }
Beispiel #6
0
        public void ProcessLuceneExcludeFilters(BaseLuceneSearcher searcher, LuceneSearchCriteria luceneSearchCriteria)
        {
            var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "", searcher.IndexingAnalyzer);
            var bQuery      = new BooleanQuery();

            //var sb = new StringBuilder();
            foreach (var filter in Filters)
            {
                if (_booleanOperation != BooleanOperation.Not)
                {
                    continue;
                }

                //a filter can return a true lucene query, if there is one use it, otherwise parse it's string format
                var luceneQueryObj = filter.GetLuceneQuery();
                bQuery.Add(luceneQueryObj ?? queryParser.Parse(filter.ToString()), BooleanClause.Occur.MUST_NOT);
            }
        }
Beispiel #7
0
        /// <summary>
        /// Generates the search criteria that we want to search on
        /// </summary>
        /// <param name="searcher"></param>
        /// <returns></returns>
        private ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher)
        {
            var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria();

            //check if there's anything to process
            if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any())
            {
                return(null);
            }


            var sb = new StringBuilder();

            if (string.IsNullOrEmpty(NodeTypeAlias) == false)
            {
                sb.AppendFormat("+nodeTypeAlias: {0} ", NodeTypeAlias);
            }

            // Three possiblities:
            // * docs version (MajorDocsVersion) supplied, give current version and NEGATE OTHERS
            // * no docs version (MajorDocsVersion) is not suplied, use it and NEGATE others
            // * all versions are requests, this is currently not implemented
            var currentMajorVersions = new string[] { "6", "7", "8", "9" };

            // add mandatory majorVersion is parameter is supplied
            string versionToFilterBy = MajorDocsVersion == null
                ? ConfigurationManager.AppSettings[Constants.AppSettings.DocumentationCurrentMajorVersion]
                : MajorDocsVersion.ToString();

            //we filter by this version by excluding the other major versions in lucene so
            if (NodeTypeAlias.InvariantEquals("documentation") == false)
            {
                var versionsToNegate = currentMajorVersions.Where(f => f != versionToFilterBy).ToArray <string>();
                foreach (var versionToNegate in versionsToNegate)
                {
                    sb.AppendFormat("-majorVersion:{0} ", versionToNegate);
                }
            }

            // do it the other way around for documentation
            if (NodeTypeAlias.InvariantEquals("documentation"))
            {
                //we filter by this version by using the major versions
                var versionsToFind = currentMajorVersions.Where(f => f == versionToFilterBy).ToArray <string>();
                foreach (var versionToFind in versionsToFind)
                {
                    sb.AppendFormat("+majorVersion:{0} ", versionToFind);
                }
            }

            if (!string.IsNullOrEmpty(Term))
            {
                sb.Append("+(");
                // Cleanup the term so there are no errors
                Term = Term
                       .Replace("\"", string.Empty)
                       .Replace(":", string.Empty)
                       .Replace("\\", string.Empty)
                       .Trim('*');
                // Replace OR's with case insensitive matching
                Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase);
                // Replace double whitespaces with single space as they were giving errors
                Term = Regex.Replace(Term, @"\s{2,}", " ");

                // Do an exact phrase match with boost
                sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term);

                // SEARCH YAML stuff
                sb.AppendFormat("tags:\"{0}\"^20000 ", Term);
                sb.AppendFormat("keywords:\"{0}\"^20000 ", Term);

                // Now we need to split the phrase into individual terms so the query parser can understand
                var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

                //now de-duplicate the terms and remove the stop words and don't include single chars
                var deduped = new List <string>();
                foreach (var s in split)
                {
                    if (s.Length > 1 &&
                        !deduped.Contains(s, StringComparer.InvariantCultureIgnoreCase) &&
                        !StandardAnalyzer.STOP_WORDS_SET.Contains(s))
                    {
                        deduped.Add(s);
                    }
                }

                if (deduped.Count > 20)
                {
                    //truncate, we don't want to search on all of these individually
                    deduped = deduped.Take(20).ToList();
                }

                if (deduped.Count > 0)
                {
                    //do standard match with boost on each term
                    foreach (var s in deduped)
                    {
                        sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s);
                    }

                    //do suffix with wildcards
                    foreach (var s in deduped)
                    {
                        sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s);
                    }

                    //do fuzzy (close match 0.9)
                    foreach (var s in deduped)
                    {
                        sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s);
                    }
                }
                sb.Append(")");
            }

            //nothing to process, return
            if (sb.Length > 0)
            {
                //render out the raw query that was constructed above
                criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString());
            }

            //Now we can apply any filters, this is done by using native Lucene query objects
            if (Filters.Any())
            {
                //If there is a filter applied to the entire result then add it here, this is a MUST sub query
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneAddFilters(searcher, criteria);
                }

                //need to process the excludes after - since that is how lucene works, you can only exclude after you've included
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneExcludeFilters(searcher, criteria);
                }
            }

            if (string.IsNullOrEmpty(OrderBy) == false)
            {
                criteria.OrderByDescending(OrderBy);
            }

            return(criteria);
        }
        /// <summary>
        /// Helper method to create an ISearchCriteria for searching all fields in a <see cref="BaseLuceneSearcher"/>
        /// </summary>
        /// <param name="searchText"></param>
        /// <param name="useWildcards"></param>
        /// <param name="searcher"></param>
        /// <returns></returns>
        /// <remarks>
        /// This is here because some of this stuff is internal in Examine
        /// </remarks>
        private ISearchCriteria SearchAllFields(string searchText, bool useWildcards, BaseLuceneSearcher searcher)
        {
            var sc = searcher.CreateSearchCriteria();

            if (_examineGetSearchFields == null)
            {
                //get the GetSearchFields method from BaseLuceneSearcher
                _examineGetSearchFields = typeof(BaseLuceneSearcher).GetMethod("GetSearchFields", BindingFlags.Instance | BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static);
            }
            //get the results of searcher.BaseLuceneSearcher() using ugly reflection since it's not public
            var searchFields = (IEnumerable <string>)_examineGetSearchFields.Invoke(searcher, null);

            //this is what Examine does internally to create ISearchCriteria for searching all fields
            var strArray = searchText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);

            sc = useWildcards == false
                ? sc.GroupedOr(searchFields, strArray).Compile()
                : sc.GroupedOr(searchFields, strArray.Select(x => new CustomExamineValue(Examineness.ComplexWildcard, x.MultipleCharacterWildcard().Value)).ToArray <IExamineValue>()).Compile();

            return(sc);
        }
Beispiel #9
0
        /// <summary>
        /// Generates the search criteria that we want to search on
        /// </summary>
        /// <param name="searcher"></param>
        /// <returns></returns>
        public ISearchCriteria GetSearchCriteria(BaseLuceneSearcher searcher)
        {
            var criteria = (LuceneSearchCriteria)searcher.CreateSearchCriteria();

            //check if there's anything to process
            if (NodeTypeAlias.IsNullOrWhiteSpace() && Term.IsNullOrWhiteSpace() && !Filters.Any())
            {
                return(null);
            }

            if (string.IsNullOrEmpty(NodeTypeAlias) == false)
            {
                criteria.Field("nodeTypeAlias", NodeTypeAlias);
            }

            var sb = new StringBuilder();

            if (!string.IsNullOrEmpty(Term))
            {
                //Cleanup the term so there are no errors
                Term = Term.Replace("\"", string.Empty)
                       .Replace(":", string.Empty)
                       .Replace("\\", string.Empty).Trim('*');
                //replace OR's with case insensitive matching
                Term = Regex.Replace(Term, @" OR ", " ", RegexOptions.IgnoreCase);
                // Replace double whitespaces with single space as they were giving errors
                Term = Regex.Replace(Term, @"\s{2,}", " ");

                //now we need to split the phrase into individual terms so the query parser can understand
                var split = Term.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

                if (split.Length > 1)
                {
                    //do an exact phrase match with boost
                    sb.AppendFormat("nodeName:\"{0}\"^20000 body:\"{0}\"^5000 ", Term);
                }

                if (split.Length > 0)
                {
                    //do standard match with boost on each term
                    foreach (var s in split)
                    {
                        sb.AppendFormat("nodeName:{0}^10000 body:{0}^50 ", s);
                    }

                    //do suffix with wildcards
                    foreach (var s in split)
                    {
                        sb.AppendFormat("nodeName:{0}*^1000 body:{0}* ", s);
                    }

                    //do fuzzy (close match 0.9)
                    foreach (var s in split)
                    {
                        sb.AppendFormat("nodeName:{0}~0.9^0.1 body:{0}~0.9^0.1 ", s);
                    }
                }
            }

            //nothing to process, return
            if (sb.Length > 0)
            {
                //render out the raw query that was constructed above
                criteria = (LuceneSearchCriteria)criteria.RawQuery(sb.ToString());
            }

            //Now we can apply any filters, this is done by using native Lucene query objects
            if (Filters.Any())
            {
                //If there is a filter applied to the entire result then add it here, this is a MUST sub query
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneAddFilters(searcher, criteria);
                }

                //need to process the excludes after - since that is how lucene works, you can only exclude after you've included
                foreach (var filter in Filters)
                {
                    filter.ProcessLuceneExcludeFilters(searcher, criteria);
                }
            }

            if (string.IsNullOrEmpty(OrderBy) == false)
            {
                criteria.OrderByDescending(OrderBy);
            }

            return(criteria);
        }