Exemplos de SitePageIndex em C# (CSharp)

Linguagem de programação: C# (CSharp)

Classe / Tipo: SitePageIndex

Exemplos em hotexamples.com: 2

SitePageIndex em C# (CSharp) - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de SitePageIndex em C# (CSharp) extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Relacionados

WeibullRandomizer

IDALConfigurationMapper

IntervieweeViewModel

CodeGenerationFieldInfo

ContentDialogService

MyMatrix

Dolphin

KPIPurchTwoTemplate

VideoInput

SqlLiteralType

Related in langs

OC_User_Backend (PHP)

Browser (PHP)

net_loadconfig (C++)

saveg_read_plat_t (C++)

Router (Go)

NewGooglePlusClient (Go)

OopVisitor (Java)

Tag (Java)

rescale_ctfs (Python)

PlayList (Python)

Exemplo n.º 1

0

Exibir arquivo

/// <summary> /// Crawls a page. /// </summary> /// <param name="url">The url to crawl.</param> private void CrawlPage(string url) { // clean up the url a bit url = StandardizeUrl(url); try { if (!PageHasBeenCrawled(url) && _robotHelper.IsPathAllowed(_userAgent, url) && url.StartsWith(_baseUrl)) { string rawPage = GetWebText(url); if (!string.IsNullOrWhiteSpace(rawPage)) { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(rawPage); // ensure the page should be indexed by looking at the robot and rock conventions HtmlNode metaRobot = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='robot']"); if (metaRobot == null || metaRobot.Attributes["content"] == null || !metaRobot.Attributes["content"].Value.Contains("noindex")) { _previouslyCrawledPages.Add(url); // index the page SitePageIndex sitePage = new SitePageIndex(); sitePage.Content = GetPageText(htmlDoc); sitePage.Url = url; sitePage.Id = url.MakeInt64HashCode(); sitePage.SourceIndexModel = "Rock.Model.Site"; sitePage.PageTitle = GetPageTitle(htmlDoc, url); sitePage.DocumentName = sitePage.PageTitle; sitePage.SiteName = _site.Name; sitePage.SiteId = _site.Id; sitePage.LastIndexedDateTime = RockDateTime.Now; HtmlNode metaDescription = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='description']"); if (metaDescription != null && metaDescription.Attributes["content"] != null) { sitePage.PageSummary = metaDescription.Attributes["content"].Value; } HtmlNode metaKeynotes = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='keywords']"); if (metaKeynotes != null && metaKeynotes.Attributes["content"] != null) { sitePage.PageKeywords = metaKeynotes.Attributes["content"].Value; } IndexContainer.IndexDocument(sitePage); // crawl all the links found on the page. foreach (string link in ParseLinks(htmlDoc)) { CrawlPage(link); } } } } } catch { } }

Exemplo n.º 2

0

Exibir arquivo

/// <summary> /// Crawls a page. /// </summary> /// <param name="url">The URL to crawl.</param> private void CrawlPage(string url) { try { // clean up the URL a bit url = StandardizeUrl(url); if (!PageHasBeenCrawled(url)) { _previouslyCrawledPages.Add(url); if (url.StartsWith(_baseUrl) && _robotHelper.IsPathAllowed(_userAgent, url.Replace(_baseUrl, ""))) { string rawPage = GetWebText(url); if (!string.IsNullOrWhiteSpace(rawPage)) { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(rawPage); // ensure the page should be indexed by looking at the robot and rock conventions HtmlNode metaRobot = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='robots']"); if (metaRobot == null || metaRobot.Attributes["content"] == null || !metaRobot.Attributes["content"].Value.Contains("noindex")) { // index the page SitePageIndex sitePage = new SitePageIndex(); sitePage.Content = GetPageText(htmlDoc); sitePage.Url = url; sitePage.Id = url.MakeInt64HashCode(); sitePage.SourceIndexModel = "Rock.Model.Site"; sitePage.PageTitle = GetPageTitle(htmlDoc, url); sitePage.DocumentName = sitePage.PageTitle; sitePage.SiteName = _site.Name; sitePage.SiteId = _site.Id; sitePage.LastIndexedDateTime = RockDateTime.Now; HtmlNode metaDescription = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='description']"); if (metaDescription != null && metaDescription.Attributes["content"] != null) { sitePage.PageSummary = metaDescription.Attributes["content"].Value; } HtmlNode metaKeynotes = htmlDoc.DocumentNode.SelectSingleNode("//meta[@name='keywords']"); if (metaKeynotes != null && metaKeynotes.Attributes["content"] != null) { sitePage.PageKeywords = metaKeynotes.Attributes["content"].Value; } // Get a hash of the content and check it against a list of to see if page has already been indexed, if not then index it and add it to the list. long contentHash = sitePage.Content.MakeInt64HashCode(); if (!_pageHashes.Contains(contentHash)) { IndexContainer.IndexDocument(sitePage); _pageHashes.Add(contentHash); } } if (metaRobot == null || metaRobot.Attributes["content"] == null || !metaRobot.Attributes["content"].Value.Contains("nofollow")) { // crawl all the links found on the page. var links = ParseLinks(htmlDoc); foreach (string link in links) { _urlQueue.Enqueue(link); } } } } } } catch { } }