C# (CSharp) PoliteWebCrawler.ShouldDownloadPageContent示例

编程语言: C# (CSharp)

类/类型: PoliteWebCrawler

方法/功能: ShouldDownloadPageContent

hotexamples.com的示例: 2

C# (CSharp) PoliteWebCrawler.ShouldDownloadPageContent - 已找到2个示例。这些是从开源项目中提取的最受好评的PoliteWebCrawler.ShouldDownloadPageContent现实C# (CSharp)示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

CrawlAsync(30)

Crawl(18)

ShouldCrawlPage(4)

ShouldCrawlPageLinks(3)

ShouldDownloadPageContent(2)

Dispose(1)

IsInternalUri(1)

示例#1

显示文件

        private static IWebCrawler GetCustomBehaviorUsingLambdaWebCrawler()
        {
            IWebCrawler crawler = new PoliteWebCrawler();

            crawler.ShouldCrawlPage((pageToCrawl, crawlContext) =>
            {
                return(new CrawlDecision {
                    Allow =
                        (pageToCrawl.Uri.AbsoluteUri.StartsWith("https://home.treasury.gov") ||
                         pageToCrawl.Uri.AbsoluteUri.StartsWith("https://www.treasury.gov")) &&
                        !pageToCrawl.Uri.AbsoluteUri.EndsWith(".pdf")
                });
            });


            crawler.ShouldDownloadPageContent((crawledPage, crawlContext) =>
            {
                return(new CrawlDecision {
                    Allow = true
                });
            });

            //Register a lambda expression that will tell Abot to not crawl links on any page that is not internal to the root uri.
            //NOTE: This lambda is run after the regular ICrawlDecsionMaker.ShouldCrawlPageLinks method is run
            crawler.ShouldCrawlPageLinks((crawledPage, crawlContext) =>
            {
                return(new CrawlDecision {
                    Allow = true
                });
            });

            return(crawler);
        }

示例#2

显示文件

        public PoliteWebCrawler CreateCrawler()
        {
            _dataFinder = new DataFinder(new KomputronikDataExtractor());

            XmlConfigurator.Configure();

            CrawlConfiguration crawlConfig = AbotConfigurationSectionHandler.LoadFromXml().Convert();

            crawlConfig.MaxConcurrentThreads = 15;//this overrides the config value

            crawlConfig.MaxCrawlDepth = 15;

            //Will use app.config for configuration
            PoliteWebCrawler crawler = new PoliteWebCrawler();

            crawler.ShouldCrawlPage(ShouldCrawlPage);
            crawler.ShouldDownloadPageContent(ShouldCrawlPageContent);
            crawler.ShouldCrawlPageLinks(ShouldCrawlPageLinks);

            crawler.PageCrawlCompletedAsync       += crawler_ProcessPageCrawlCompleted;
            crawler.PageCrawlDisallowedAsync      += crawler_PageCrawlDisallowed;
            crawler.PageLinksCrawlDisallowedAsync += crawler_PageLinksCrawlDisallowed;
            return(crawler);
        }