コード例 #1
0
ファイル: CrawlScheduler.cs プロジェクト: vpetroff/NetCrawler
        public CrawlScheduler(IUrlHasher urlHasher, IConfiguration configuration, ISinglePageCrawler pageCrawler, ICrawlUrlRepository crawlUrlRepository)
        {
            this.urlHasher = urlHasher;
            this.configuration = configuration;
            this.pageCrawler = pageCrawler;
            this.crawlUrlRepository = crawlUrlRepository;

            schedulingBlock = new ActionBlock<PageCrawlResult>(result =>
                {
                    var websiteDefinition = result.CrawlUrl.WebsiteDefinition;

                    Interlocked.Increment(ref websiteDefinition.ProcessedUrlsCount);

                    RaisePageCrawled(result);

                    if (result.Links.Any())
                    {
                        Schedule(result.Links);
                    }

                    if (websiteDefinition.UrlsToProcessCount == websiteDefinition.ProcessedUrlsCount)
                    {
                        websiteProcessingDefinitions[websiteDefinition].Complete();
                    }

                    ScheduleNext();
                });
        }
コード例 #2
0
ファイル: CrawlScheduler.cs プロジェクト: vpetroff/NetCrawler
        public CrawlScheduler(IUrlHasher urlHasher, IConfiguration configuration, ISinglePageCrawler pageCrawler, ICrawlUrlRepository crawlUrlRepository)
        {
            this.urlHasher          = urlHasher;
            this.configuration      = configuration;
            this.pageCrawler        = pageCrawler;
            this.crawlUrlRepository = crawlUrlRepository;

            schedulingBlock = new ActionBlock <PageCrawlResult>(result =>
            {
                var websiteDefinition = result.CrawlUrl.WebsiteDefinition;

                Interlocked.Increment(ref websiteDefinition.ProcessedUrlsCount);

                RaisePageCrawled(result);

                if (result.Links.Any())
                {
                    Schedule(result.Links);
                }

                if (websiteDefinition.UrlsToProcessCount == websiteDefinition.ProcessedUrlsCount)
                {
                    websiteProcessingDefinitions[websiteDefinition].Complete();
                }

                ScheduleNext();
            });
        }