private static void IndexItem(HtmlContent content)
        {
            bool disableSearchIndex = ConfigHelper.GetBoolProperty("DisableSearchIndex", false);

            if (disableSearchIndex)
            {
                return;
            }
            //SiteSettings siteSettings
            //    = CacheHelper.GetCurrentSiteSettings();

            //if (
            //    (content == null)
            //    || (siteSettings == null)
            //    )
            //{
            //    return;
            //}

            Guid htmlFeatureGuid
                = new Guid("113FB01C-6408-4607-B0F7-1379E2512396");
            ModuleDefinition htmlFeature
                = new ModuleDefinition(htmlFeatureGuid);
            Module module = new Module(content.ModuleId);

            // get list of pages where this module is published
            List <PageModule> pageModules
                = PageModule.GetPageModulesByModule(content.ModuleId);

            foreach (PageModule pageModule in pageModules)
            {
                PageSettings pageSettings
                    = new PageSettings(
                          content.SiteId,
                          pageModule.PageId);

                //don't index pending/unpublished pages
                if (pageSettings.IsPending)
                {
                    continue;
                }

                IndexItem indexItem = new IndexItem();
                if (content.SearchIndexPath.Length > 0)
                {
                    indexItem.IndexPath = content.SearchIndexPath;
                }
                indexItem.SiteId          = content.SiteId;
                indexItem.PageId          = pageModule.PageId;
                indexItem.PageName        = pageSettings.PageName;
                indexItem.ViewRoles       = pageSettings.AuthorizedRoles;
                indexItem.ModuleViewRoles = module.ViewRoles;
                if (pageSettings.UseUrl)
                {
                    indexItem.ViewPage             = pageSettings.Url.Replace("~/", string.Empty);
                    indexItem.UseQueryStringParams = false;
                }

                // generally we should not include the page meta because it can result in duplicate results
                // one for each instance of html content on the page because they all use the smae page meta.
                // since page meta should reflect the content of the page it is sufficient to just index the content
                if ((ConfigurationManager.AppSettings["IndexPageMeta"] != null) && (ConfigurationManager.AppSettings["IndexPageMeta"] == "true"))
                {
                    indexItem.PageMetaDescription = pageSettings.PageMetaDescription;
                    indexItem.PageMetaKeywords    = pageSettings.PageMetaKeyWords;
                }

                indexItem.FeatureId           = htmlFeatureGuid.ToString();
                indexItem.FeatureName         = htmlFeature.FeatureName;
                indexItem.FeatureResourceFile = htmlFeature.ResourceFile;

                indexItem.ItemId           = content.ItemId;
                indexItem.ModuleId         = content.ModuleId;
                indexItem.ModuleTitle      = module.ModuleTitle;
                indexItem.Title            = content.Title;
                indexItem.Content          = SecurityHelper.RemoveMarkup(content.Body);
                indexItem.PublishBeginDate = pageModule.PublishBeginDate;
                indexItem.PublishEndDate   = pageModule.PublishEndDate;

                IndexHelper.RebuildIndex(indexItem);
            }

            log.Debug("Indexed " + content.Title);
        }
        public override void RebuildIndex(
            PageSettings pageSettings,
            string indexPath)
        {
            bool disableSearchIndex = ConfigHelper.GetBoolProperty("DisableSearchIndex", false);

            if (disableSearchIndex)
            {
                return;
            }

            if (pageSettings == null)
            {
                log.Error("pageSettings passed in to HtmlContentIndexBuilderProvider.RebuildIndex was null");
                return;
            }

            //don't index pending/unpublished pages
            if (pageSettings.IsPending)
            {
                return;
            }

            log.Info("HtmlContentIndexBuilderProvider indexing page - "
                     + pageSettings.PageName);

            try
            {
                Guid htmlFeatureGuid
                    = new Guid("113FB01C-6408-4607-B0F7-1379E2512396");
                ModuleDefinition htmlFeature
                    = new ModuleDefinition(htmlFeatureGuid);

                List <PageModule> pageModules
                    = PageModule.GetPageModulesByPage(pageSettings.PageId);

                HtmlRepository repository = new HtmlRepository();

                DataTable dataTable = repository.GetHtmlContentByPage(
                    pageSettings.SiteId,
                    pageSettings.PageId);

                foreach (DataRow row in dataTable.Rows)
                {
                    IndexItem indexItem = new IndexItem();
                    indexItem.SiteId   = pageSettings.SiteId;
                    indexItem.PageId   = pageSettings.PageId;
                    indexItem.PageName = pageSettings.PageName;

                    // generally we should not include the page meta because it can result in duplicate results
                    // one for each instance of html content on the page because they all use the smae page meta.
                    // since page meta should reflect the content of the page it is sufficient to just index the content
                    if ((ConfigurationManager.AppSettings["IndexPageMeta"] != null) && (ConfigurationManager.AppSettings["IndexPageMeta"] == "true"))
                    {
                        indexItem.PageMetaDescription = pageSettings.PageMetaDescription;
                        indexItem.PageMetaKeywords    = pageSettings.PageMetaKeyWords;
                    }

                    indexItem.ViewRoles       = pageSettings.AuthorizedRoles;
                    indexItem.ModuleViewRoles = row["ViewRoles"].ToString();
                    if (pageSettings.UseUrl)
                    {
                        indexItem.ViewPage             = pageSettings.Url.Replace("~/", string.Empty);
                        indexItem.UseQueryStringParams = false;
                    }
                    indexItem.FeatureId           = htmlFeatureGuid.ToString();
                    indexItem.FeatureName         = htmlFeature.FeatureName;
                    indexItem.FeatureResourceFile = htmlFeature.ResourceFile;

                    indexItem.ItemId      = Convert.ToInt32(row["ItemID"]);
                    indexItem.ModuleId    = Convert.ToInt32(row["ModuleID"]);
                    indexItem.ModuleTitle = row["ModuleTitle"].ToString();
                    indexItem.Title       = row["Title"].ToString();
                    // added the remove markup 2010-01-30 because some javascript strings like ]]> were apearing in search results if the content conatined jacvascript
                    indexItem.Content = SecurityHelper.RemoveMarkup(row["Body"].ToString());

                    // lookup publish dates
                    foreach (PageModule pageModule in pageModules)
                    {
                        if (indexItem.ModuleId == pageModule.ModuleId)
                        {
                            indexItem.PublishBeginDate = pageModule.PublishBeginDate;
                            indexItem.PublishEndDate   = pageModule.PublishEndDate;
                        }
                    }

                    IndexHelper.RebuildIndex(indexItem, indexPath);

                    log.Debug("Indexed " + indexItem.Title);
                }
            }
            catch (System.Data.Common.DbException ex)
            {
                log.Error(ex);
            }
        }
Beispiel #3
0
        private Document GetDocument(IndexItem indexItem)
        {
            Document doc = new Document();

            // searchable fields
            doc.Add(new Field("Key", indexItem.Key, Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("SiteID", indexItem.SiteId.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("ViewRoles", indexItem.ViewRoles, Field.Store.YES, Field.Index.NO));

            string[] roles = indexItem.ViewRoles.Split(';');
            foreach (string role in roles)
            {
                if (role.Length > 0)
                {
                    doc.Add(new Field("Role", role, Field.Store.YES, Field.Index.UN_TOKENIZED));
                }
            }

            roles = indexItem.ModuleViewRoles.Split(';');
            foreach (string role in roles)
            {
                if (role.Length > 0)
                {
                    doc.Add(new Field("ModuleRole", role, Field.Store.YES, Field.Index.UN_TOKENIZED));
                }
            }

            doc.Add(new Field("FeatureId", indexItem.FeatureId, Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("PageID", indexItem.PageId.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("ModuleID", indexItem.ModuleId.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("ItemID", indexItem.ItemId.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.UN_TOKENIZED));

            doc.Add(new Field("PublishBeginDate", indexItem.PublishBeginDate.ToString("s"), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("PublishEndDate", indexItem.PublishEndDate.ToString("s"), Field.Store.YES, Field.Index.UN_TOKENIZED));
            doc.Add(new Field("IndexedUtc", DateTime.UtcNow.ToString("s"), Field.Store.YES, Field.Index.UN_TOKENIZED));

            doc.Add(new Field("PageName", indexItem.PageName, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
            doc.Add(new Field("ModuleTitle", indexItem.ModuleTitle, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
            doc.Add(new Field("Title", indexItem.Title, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
            doc.Add(new Field("PageMetaDesc", indexItem.PageMetaDescription, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));

            string[] keywords = indexItem.PageMetaKeywords.Split(',');
            foreach (string word in keywords)
            {
                if (word.Trim().Length > 0)
                {
                    doc.Add(new Field("Keyword", word.Trim(), Field.Store.YES, Field.Index.UN_TOKENIZED));
                }
            }

            string textContent = ConvertToText(indexItem.Content);

            doc.Add(new Field("Intro",
                              (textContent.Length < 100 ? textContent : (UIHelper.CreateExcerpt(textContent, 97) + "..."))
                              , Field.Store.YES, Field.Index.UN_TOKENIZED
                              )
                    );

            // other content is optional, used for blog comments
            // could be used elsewhere
            if (storeContentForResultsHighlighting)
            {
                doc.Add(new Field("contents", textContent + " "
                                  + ConvertToText(indexItem.OtherContent), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
            }
            else
            {
                doc.Add(new Field("contents", textContent + " "
                                  + ConvertToText(indexItem.OtherContent), Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES));
            }


            //unsearchable fields
            doc.Add(new Field("Feature", indexItem.FeatureName, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("FeatureResourceFile", indexItem.FeatureResourceFile, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("PageNumber", indexItem.PageNumber.ToString(CultureInfo.InvariantCulture), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("ViewPage", indexItem.ViewPage, Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("UseQueryStringParams", indexItem.UseQueryStringParams.ToString(), Field.Store.YES, Field.Index.NO));
            doc.Add(new Field("QueryStringAddendum", indexItem.QueryStringAddendum, Field.Store.YES, Field.Index.NO));


            return(doc);
        }
Beispiel #4
0
        private void ProcessQueue(DataTable q, string indexPath)
        {
            rowsProcessed = 0;
            rowsToProcess = q.Rows.Count;

            // first process deletes with reader
            try
            {
                IndexReader reader = IndexReader.Open(indexPath);

                foreach (DataRow row in q.Rows)
                {
                    Term term = new Term("Key", row["ItemKey"].ToString());
                    try
                    {
                        reader.DeleteDocuments(term);
                        log.Debug("reader.DeleteDocuments(term) for Key " + row["ItemKey"].ToString());
                    }
                    catch (Exception ge)
                    {
                        // TODO: monitor what real exceptions if any occur and then
                        // change this catch to catch only the expected ones
                        // instead of non specific exception
                        log.Error(ge);
                    }

                    bool removeOnly = Convert.ToBoolean(row["RemoveOnly"]);
                    if (removeOnly)
                    {
                        Int64 rowId = Convert.ToInt64(row["RowId"]);
                        IndexingQueue.Delete(rowId);
                    }


                    if (DateTime.UtcNow > nextStatusUpdateTime)
                    {
                        // don't mark as complete because there may be more qu items
                        //for different index paths in a multi site installation
                        bool markAsComplete = false;
                        ReportStatus(markAsComplete);
                    }
                }

                reader.Close();
            }
            catch (IOException ex)
            {
                log.Info("IndexWriter swallowed exception this is not unexpected if building or rebuilding the search index ", ex);
                errorCount += 1;
            }
            catch (TypeInitializationException ex)
            {
                log.Info("IndexWriter swallowed exception ", ex);
                errorCount += 1;
            }


            // next add items with writer
            IndexWriter indexWriter = GetWriter(indexPath);

            if (indexWriter == null)
            {
                log.Error("failed to get IndexWriter for path: " + indexPath);
                errorCount += 1;
                return;
            }

            foreach (DataRow row in q.Rows)
            {
                bool removeOnly = Convert.ToBoolean(row["RemoveOnly"]);
                if (!removeOnly)
                {
                    try
                    {
                        IndexItem indexItem
                            = (IndexItem)SerializationHelper.DeserializeFromString(typeof(IndexItem), row["SerializedItem"].ToString());

                        Document doc = GetDocument(indexItem);
                        WriteToIndex(doc, indexWriter);
                        log.Debug("called WriteToIndex(doc, indexWriter) for key " + indexItem.Key);
                        Int64 rowId = Convert.ToInt64(row["RowId"]);
                        IndexingQueue.Delete(rowId);
                    }
                    catch (Exception ex)
                    {
                        log.Error(ex);
                    }
                }

                if (DateTime.UtcNow > nextStatusUpdateTime)
                {
                    // don't mark as complete because there may be more qu items
                    //for different index paths in a multi site installation
                    bool markAsComplete = false;
                    ReportStatus(markAsComplete);
                }
            }

            try
            {
                indexWriter.Optimize();
            }
            catch (IOException ex)
            {
                log.Error(ex);
            }

            try
            {
                indexWriter.Close();
            }
            catch (IOException ex)
            {
                log.Error(ex);
            }
        }
 public void Remove(IndexItem item)
 {
     this.List.Remove(item);
 }
 public void Add(IndexItem item)
 {
     this.List.Add(item);
 }
        /// <summary>
        /// search support multiple modules
        /// </summary>
        /// <param name="siteId"></param>
        /// <param name="isAdmin"></param>
        /// <param name="userRoles"></param>
        /// <param name="queryText"></param>
        /// <param name="highlightResults"></param>
        /// <param name="highlightedFragmentSize"></param>
        /// <param name="pageNumber"></param>
        /// <param name="pageSize"></param>
        /// <param name="totalHits"></param>
        /// <param name="invalidQuery"></param>
        /// <param name="moduleIDs"></param>
        /// <returns></returns>
        public static IndexItemCollection Search(
            int siteId,
            bool isAdmin,
            List <string> userRoles,
            string queryText,
            bool highlightResults,
            int highlightedFragmentSize,
            int pageNumber,
            int pageSize,
            out int totalHits,
            out bool invalidQuery,
            params Guid[] moduleIDs
            )
        {
            invalidQuery = false;
            totalHits    = 0;
            string indexPath            = GetIndexPath(siteId);
            IndexItemCollection results = new IndexItemCollection();

            if (string.IsNullOrEmpty(queryText))
            {
                return(results);
            }

            bool useBackwardCompatibilityMode = true;

            if (
                (ConfigurationManager.AppSettings["SearchUseBackwardCompatibilityMode"] != null) &&
                (ConfigurationManager.AppSettings["SearchUseBackwardCompatibilityMode"] == "false")
                )
            {
                useBackwardCompatibilityMode = false;
            }

            bool IncludeModuleRoleFilters = false;

            if (
                (ConfigurationManager.AppSettings["SearchIncludeModuleRoleFilters"] != null) &&
                (ConfigurationManager.AppSettings["SearchIncludeModuleRoleFilters"] == "true")
                )
            {
                IncludeModuleRoleFilters = true;
            }


            if (IndexReader.IndexExists(indexPath))
            {
                if (log.IsDebugEnabled)
                {
                    log.Debug("Entered Search, indexPath = " + indexPath);
                }

                long startTicks = DateTime.Now.Ticks;

                try
                {
                    BooleanQuery mainQuery = new BooleanQuery();

                    if ((!isAdmin) && (!useBackwardCompatibilityMode))
                    {
                        AddRoleQueries(userRoles, mainQuery);
                    }

                    if ((!isAdmin) && (IncludeModuleRoleFilters))
                    {
                        AddModuleRoleQueries(userRoles, mainQuery);
                    }


                    Query multiQuery = MultiFieldQueryParser.Parse(
                        new string[] { queryText, queryText, queryText, queryText, queryText, queryText.Replace("*", string.Empty) },
                        new string[] { "Title", "ModuleTitle", "contents", "PageName", "PageMetaDesc", "Keyword" },
                        new StandardAnalyzer());

                    mainQuery.Add(multiQuery, BooleanClause.Occur.MUST);


                    if (!useBackwardCompatibilityMode)
                    {
                        Term       beginDateStart = new Term("PublishBeginDate", DateTime.MinValue.ToString("s"));
                        Term       beginDateEnd   = new Term("PublishBeginDate", DateTime.UtcNow.ToString("s"));
                        RangeQuery beginDateQuery = new RangeQuery(beginDateStart, beginDateEnd, true);
                        mainQuery.Add(beginDateQuery, BooleanClause.Occur.MUST);

                        Term       endDateStart = new Term("PublishEndDate", DateTime.UtcNow.ToString("s"));
                        Term       endDateEnd   = new Term("PublishEndDate", DateTime.MaxValue.ToString("s"));
                        RangeQuery endDateQuery = new RangeQuery(endDateStart, endDateEnd, true);
                        mainQuery.Add(endDateQuery, BooleanClause.Occur.MUST);
                    }

                    if (moduleIDs != null && moduleIDs.Length > 0)
                    {
                        BooleanQuery featureFilter = new BooleanQuery();
                        moduleIDs.ToList().ForEach(x => {
                            if (x != Guid.Empty)
                            {
                                featureFilter.Add(new TermQuery(new Term("FeatureId", x.ToString())), BooleanClause.Occur.SHOULD);
                            }
                        });
                        if (featureFilter.Clauses().Count > 0)
                        {
                            mainQuery.Add(featureFilter, BooleanClause.Occur.MUST);
                        }
                    }


                    IndexSearcher searcher = new IndexSearcher(indexPath);
                    // a 0 based colection
                    Hits hits = searcher.Search(mainQuery);

                    int startHit = 0;

                    if (pageNumber > 1)
                    {
                        startHit = ((pageNumber - 1) * pageSize);
                    }


                    totalHits = hits.Length();
                    int end = startHit + pageSize;
                    if (totalHits <= end)
                    {
                        end = totalHits;
                    }
                    int itemsAdded = 0;
                    int itemsToAdd = end;

                    // in backward compatibility mode if multiple pages of results are found we amy not be showing every user the correct
                    // number of hits they can see as we only filter out the current page
                    //we may decrement total hits if filtering results so keep the original count
                    int actualHits = totalHits;

                    if (!useBackwardCompatibilityMode)
                    {
                        // this new way is much cleaner
                        //all filtering is done by query so the hitcount is true
                        //whereas with the old way it could be wrong since there
                        // were possibly results filtered out after the query returned.

                        QueryScorer scorer      = new QueryScorer(multiQuery);
                        Formatter   formatter   = new SimpleHTMLFormatter("<span class='searchterm'>", "</span>");
                        Highlighter highlighter = new Highlighter(formatter, scorer);
                        highlighter.SetTextFragmenter(new SimpleFragmenter(highlightedFragmentSize));


                        for (int i = startHit; i < itemsToAdd; i++)
                        {
                            IndexItem indexItem = new IndexItem(hits.Doc(i), hits.Score(i));

                            if (highlightResults)
                            {
                                try
                                {
                                    TokenStream stream = new StandardAnalyzer().TokenStream("contents", new StringReader(hits.Doc(i).Get("contents")));

                                    string highlightedResult = highlighter.GetBestFragment(stream, hits.Doc(i).Get("contents"));
                                    if (highlightedResult != null)
                                    {
                                        indexItem.Intro = highlightedResult;
                                    }
                                }
                                catch (NullReferenceException) { }
                            }

                            results.Add(indexItem);
                            itemsAdded += 1;
                        }
                    }
                    else
                    {
                        //backward compatible with old indexes
                        int filteredItems = 0;
                        for (int i = startHit; i < itemsToAdd; i++)
                        {
                            bool needToDecrementTotalHits = false;
                            if (
                                (isAdmin) ||
                                (WebUser.IsContentAdmin) ||
                                (WebUser.IsInRoles(hits.Doc(i).Get("ViewRoles")))
                                )
                            {
                                IndexItem indexItem = new IndexItem(hits.Doc(i), hits.Score(i));

                                if (
                                    (DateTime.UtcNow > indexItem.PublishBeginDate) &&
                                    (DateTime.UtcNow < indexItem.PublishEndDate)
                                    )
                                {
                                    results.Add(indexItem);
                                }
                                else
                                {
                                    needToDecrementTotalHits = true;
                                }
                            }
                            else
                            {
                                needToDecrementTotalHits = true;
                            }

                            //filtered out a result so need to decrement
                            if (needToDecrementTotalHits)
                            {
                                filteredItems += 1;
                                totalHits     -= 1;

                                //we also are not getting as many results as the page size so if there are more items
                                //we should increment itemsToAdd
                                if ((itemsAdded + filteredItems) < actualHits)
                                {
                                    itemsToAdd += 1;
                                }
                            }
                        }
                    }



                    searcher.Close();

                    results.ItemCount = itemsAdded;
                    results.PageIndex = pageNumber;

                    results.ExecutionTime = DateTime.Now.Ticks - startTicks;
                }
                catch (ParseException ex)
                {
                    invalidQuery = true;
                    log.Error("handled error for search terms " + queryText, ex);
                    // these parser exceptions are generally caused by
                    // spambots posting too much junk into the search form
                    // heres an option to automatically ban the ip address
                    HandleSpam(queryText, ex);


                    return(results);
                }
                catch (BooleanQuery.TooManyClauses ex)
                {
                    invalidQuery = true;
                    log.Error("handled error for search terms " + queryText, ex);
                    return(results);
                }
            }

            return(results);
        }