C# (CSharp) IndexContainer.IndexDocument примеры использования

Язык программирования: C# (CSharp)

Класс/Тип: IndexContainer

Метод/Функция: IndexDocument

Примеров на hotexamples.com: 6

C# (CSharp) IndexContainer.IndexDocument - 6 примеров найдено. Это лучшие примеры C# (CSharp) кода для IndexContainer.IndexDocument, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Save(7)

IndexDocument(6)

GetActiveComponent(6)

CreateIndex(5)

IndexDocuments(5)

DeleteDocumentsByType(4)

DeleteIndex(4)

GetSmallestBundle(4)

DeleteDocument(2)

DeleteDocumentById(2)

DeleteDocumentByProperty(2)

FNV1a64Hash(2)

GetIndexList(1)

Пример #1

Показать файл

        /// <summary>
        /// Crawls a page.
        /// </summary>
        /// <param name="url">The url to crawl.</param>
        private void CrawlPage(string url)
        {
            // clean up the url a bit
            url = StandardizeUrl(url);

            try
            {
                if (!PageHasBeenCrawled(url) && _robotHelper.IsPathAllowed(_userAgent, url) && url.StartsWith(_baseUrl))
                {
                    string rawPage = GetWebText(url);

                    if (!string.IsNullOrWhiteSpace(rawPage))
                    {
                        var htmlDoc = new HtmlDocument();
                        htmlDoc.LoadHtml(rawPage);

                        // ensure the page should be indexed by looking at the robot and rock conventions
                        HtmlNode metaRobot = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='robot']");
                        if (metaRobot == null || metaRobot.Attributes["content"] == null || !metaRobot.Attributes["content"].Value.Contains("noindex"))
                        {
                            _previouslyCrawledPages.Add(url);

                            // index the page
                            SitePageIndex sitePage = new SitePageIndex();

                            sitePage.Content             = GetPageText(htmlDoc);
                            sitePage.Url                 = url;
                            sitePage.Id                  = url.MakeInt64HashCode();
                            sitePage.SourceIndexModel    = "Rock.Model.Site";
                            sitePage.PageTitle           = GetPageTitle(htmlDoc, url);
                            sitePage.DocumentName        = sitePage.PageTitle;
                            sitePage.SiteName            = _site.Name;
                            sitePage.SiteId              = _site.Id;
                            sitePage.LastIndexedDateTime = RockDateTime.Now;

                            HtmlNode metaDescription = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='description']");
                            if (metaDescription != null && metaDescription.Attributes["content"] != null)
                            {
                                sitePage.PageSummary = metaDescription.Attributes["content"].Value;
                            }

                            HtmlNode metaKeynotes = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='keywords']");
                            if (metaKeynotes != null && metaKeynotes.Attributes["content"] != null)
                            {
                                sitePage.PageKeywords = metaKeynotes.Attributes["content"].Value;
                            }

                            IndexContainer.IndexDocument(sitePage);

                            // crawl all the links found on the page.
                            foreach (string link in ParseLinks(htmlDoc))
                            {
                                CrawlPage(link);
                            }
                        }
                    }
                }
            }
            catch { }
        }

Пример #2

Показать файл

Файл: Document.Logic.cs Проект: SparkDevNetwork/Rock

        /// <summary>
        /// Indexes the document.
        /// </summary>
        /// <param name="id"></param>
        public void IndexDocument(int id)
        {
            var documentEntity = new DocumentService(new RockContext()).Get(id);

            var indexItem = DocumentIndex.LoadByModel(documentEntity);

            IndexContainer.IndexDocument(indexItem);
        }

Пример #3

Показать файл

        /// <summary>
        /// Indexes the document.
        /// </summary>
        /// <param name="id"></param>
        public void IndexDocument(int id)
        {
            var groupEntity = new GroupService(new RockContext()).Get(id);

            // check that this group type is set to be indexed.
            if (groupEntity.GroupType.IsIndexEnabled && groupEntity.IsActive)
            {
                var indexItem = GroupIndex.LoadByModel(groupEntity);
                IndexContainer.IndexDocument(indexItem);
            }
        }

Пример #4

Показать файл

        /// <summary>
        /// Indexes the document.
        /// </summary>
        /// <param name="id">The identifier.</param>
        public void IndexDocument(int id)
        {
            var eventItemEntity = new EventItemService(new RockContext()).Get(id);

            // Check to ensure that the event item is on a calendar that is indexed
            if (eventItemEntity != null && eventItemEntity.EventCalendarItems.Any(c => c.EventCalendar.IsIndexEnabled))
            {
                var indexItem = EventItemIndex.LoadByModel(eventItemEntity);
                IndexContainer.IndexDocument(indexItem);
            }
        }

Пример #5

Показать файл

Файл: ContentChannelItem.cs Проект: ewin66/rockrms

        /// <summary>
        /// Indexes the document.
        /// </summary>
        /// <param name="id"></param>
        public void IndexDocument(int id)
        {
            var itemEntity = new ContentChannelItemService(new RockContext()).Get(id);

            // only index if the content channel is set to be indexed
            if (itemEntity.ContentChannel != null && itemEntity.ContentChannel.IsIndexEnabled)
            {
                // ensure it's meant to be indexed
                if (itemEntity.ContentChannel.IsIndexEnabled && (itemEntity.ContentChannel.RequiresApproval == false || itemEntity.ContentChannel.ContentChannelType.DisableStatus || itemEntity.Status == ContentChannelItemStatus.Approved))
                {
                    var indexItem = ContentChannelItemIndex.LoadByModel(itemEntity);
                    IndexContainer.IndexDocument(indexItem);
                }
            }
        }

Пример #6

Показать файл

        /// <summary>
        /// Crawls a page.
        /// </summary>
        /// <param name="url">The URL to crawl.</param>
        private void CrawlPage(string url)
        {
            try
            {
                // clean up the URL a bit
                url = StandardizeUrl(url);

                if (!PageHasBeenCrawled(url))
                {
                    _previouslyCrawledPages.Add(url);

                    if (url.StartsWith(_baseUrl) && _robotHelper.IsPathAllowed(_userAgent, url.Replace(_baseUrl, "")))
                    {
                        string rawPage = GetWebText(url);

                        if (!string.IsNullOrWhiteSpace(rawPage))
                        {
                            var htmlDoc = new HtmlDocument();
                            htmlDoc.LoadHtml(rawPage);

                            // ensure the page should be indexed by looking at the robot and rock conventions
                            HtmlNode metaRobot = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='robots']");
                            if (metaRobot == null || metaRobot.Attributes["content"] == null || !metaRobot.Attributes["content"].Value.Contains("noindex"))
                            {
                                // index the page
                                SitePageIndex sitePage = new SitePageIndex();

                                sitePage.Content             = GetPageText(htmlDoc);
                                sitePage.Url                 = url;
                                sitePage.Id                  = url.MakeInt64HashCode();
                                sitePage.SourceIndexModel    = "Rock.Model.Site";
                                sitePage.PageTitle           = GetPageTitle(htmlDoc, url);
                                sitePage.DocumentName        = sitePage.PageTitle;
                                sitePage.SiteName            = _site.Name;
                                sitePage.SiteId              = _site.Id;
                                sitePage.LastIndexedDateTime = RockDateTime.Now;

                                HtmlNode metaDescription = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='description']");
                                if (metaDescription != null && metaDescription.Attributes["content"] != null)
                                {
                                    sitePage.PageSummary = metaDescription.Attributes["content"].Value;
                                }

                                HtmlNode metaKeynotes = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='keywords']");
                                if (metaKeynotes != null && metaKeynotes.Attributes["content"] != null)
                                {
                                    sitePage.PageKeywords = metaKeynotes.Attributes["content"].Value;
                                }

                                // Get a hash of the content and check it against a list of to see if page has already been indexed, if not then index it and add it to the list.
                                long contentHash = sitePage.Content.MakeInt64HashCode();

                                if (!_pageHashes.Contains(contentHash))
                                {
                                    IndexContainer.IndexDocument(sitePage);
                                    _pageHashes.Add(contentHash);
                                }
                            }

                            if (metaRobot == null || metaRobot.Attributes["content"] == null || !metaRobot.Attributes["content"].Value.Contains("nofollow"))
                            {
                                // crawl all the links found on the page.
                                var links = ParseLinks(htmlDoc);

                                foreach (string link in links)
                                {
                                    _urlQueue.Enqueue(link);
                                }
                            }
                        }
                    }
                }
            }
            catch { }
        }