Exemplos de PoliteWebCrawler.IsInternalUri em C# (CSharp)

Linguagem de programação: C# (CSharp)

Classe / Tipo: PoliteWebCrawler

Método / Função: IsInternalUri

Exemplos em hotexamples.com: 1

PoliteWebCrawler.IsInternalUri em C# (CSharp) - 1 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de PoliteWebCrawler.IsInternalUri em C# (CSharp) extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

CrawlAsync(30)

Crawl(18)

ShouldCrawlPage(4)

ShouldCrawlPageLinks(3)

ShouldDownloadPageContent(2)

Dispose(1)

IsInternalUri(1)

Métodos Frequentes

CrawlAsync (30)

Crawl (18)

ShouldCrawlPage (4)

ShouldCrawlPageLinks (3)

ShouldDownloadPageContent (2)

Dispose (1)

IsInternalUri (1)

Exemplo n.º 1

0

Exibir arquivo

private void ConfigureWebCrawler() { CrawlConfiguration config = new CrawlConfiguration(); config.MaxConcurrentThreads = Environment.ProcessorCount; config.MaxPagesToCrawl = 0; config.MaxPagesToCrawlPerDomain = 0; config.MaxPageSizeInBytes = 0; config.UserAgentString = "Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko"; config.HttpProtocolVersion = HttpProtocolVersion.NotSpecified; config.CrawlTimeoutSeconds = 0; config.IsUriRecrawlingEnabled = false; config.IsExternalPageCrawlingEnabled = false; config.IsExternalPageLinksCrawlingEnabled = false; config.IsRespectUrlNamedAnchorOrHashbangEnabled = false; config.DownloadableContentTypes = "text/html, text/plain"; config.HttpServicePointConnectionLimit = 200; config.HttpRequestTimeoutInSeconds = 15; config.HttpRequestMaxAutoRedirects = 7; config.IsHttpRequestAutoRedirectsEnabled = true; config.IsHttpRequestAutomaticDecompressionEnabled = true; config.IsSendingCookiesEnabled = false; config.IsSslCertificateValidationEnabled = false; config.MinAvailableMemoryRequiredInMb = 0; config.MaxMemoryUsageInMb = 0; config.MaxMemoryUsageCacheTimeInSeconds = 0; config.MaxCrawlDepth = 1000; config.MaxLinksPerPage = 1000; config.IsForcedLinkParsingEnabled = false; config.MaxRetryCount = 0; config.MinRetryDelayInMilliseconds = 0; config.IsRespectRobotsDotTextEnabled = true; config.UrlPatternsToExclude = ExtractorParams.UrlPatternsToExclude; config.IsRespectMetaRobotsNoFollowEnabled = true; config.IsRespectHttpXRobotsTagHeaderNoFollowEnabled = true; config.IsRespectAnchorRelNoFollowEnabled = true; config.IsIgnoreRobotsDotTextIfRootDisallowedEnabled = false; config.RobotsDotTextUserAgentString = "bingbot"; config.MinCrawlDelayPerDomainMilliSeconds = ExtractorParams.MinCrawlDelay; config.MaxRobotsDotTextCrawlDelayInSeconds = 5; config.IsAlwaysLogin = false; config.LoginUser = ""; config.LoginPassword = ""; config.UseDefaultCredentials = false; if (!DoContinue) { scheduler = new Scheduler(config.IsUriRecrawlingEnabled, null, null); } else { using (FileStream fs = new FileStream(Path.Combine(ContentDirectory.FullName, LogsDirName, CheckpointFileName), FileMode.Open)) { scheduler = Scheduler.Deserialize(fs); } } crawler = new PoliteWebCrawler(config, null, null, scheduler, null, null, null, null, null); crawler.IsInternalUri((candidateUri, rootUri) => HtmlFileUtils.ShouldCrawlUri(ExtractorParams.Scope, candidateUri, rootUri)); crawler.ShouldCrawlPageLinks(WebCrawler_ShouldCrawlPageLinks); crawler.PageCrawlCompletedAsync += WebCrawler_PageCrawlCompletedAsync; // DEBUG: uncomment to debug Abot crawl progress // crawler.PageCrawlStartingAsync += WebCrawler_PageCrawlStartingAsync; // DEBUG: uncomment to debug Abot crawling decisions // crawler.PageCrawlDisallowedAsync += WebCrawler_PageCrawlDisallowedAsync; // crawler.PageLinksCrawlDisallowedAsync += WebCrawler_PageLinksCrawlDisallowedAsync; }