コード例 #1
0
ファイル: CrawlScheduler.cs プロジェクト: vpetroff/NetCrawler
        public Task <CrawlResult> Schedule(Website website)
        {
            var existing = websiteDefinitions.FirstOrDefault(x => x.Website == website);

            if (existing != null)
            {
                return(websiteProcessingDefinitions[existing].CompletionSource.Task);
            }

            if (website == null || string.IsNullOrWhiteSpace(website.RootUrl))
            {
                var cancelledTask = new TaskCompletionSource <CrawlResult>();
                cancelledTask.SetCanceled();

                return(cancelledTask.Task);
            }

            website.RootUrl = website.RootUrl.Split('#')[0].TrimEnd('/');

            WebsiteProcessingDefinition websiteProcessingDefinition;

            lock (websiteLock)
            {
                var processingBlock = CreateProcessingBlock(website);

                var websiteDefinition = new WebsiteDefinition
                {
                    Website     = website,
                    CrawlResult = new CrawlResult(),
                };

                websiteProcessingDefinition = new WebsiteProcessingDefinition(websiteDefinition)
                {
                    ProcessingBlock  = processingBlock,
                    CompletionSource = new TaskCompletionSource <CrawlResult>()
                };

                if (websiteProcessingDefinitions.TryAdd(websiteDefinition, websiteProcessingDefinition))
                {
                    websiteDefinitions.Add(websiteDefinition);
                }
            }

            RaiseWebsiteScheduled(website);

            var outstandingLinks = Schedule(new[] { website.RootUrl });

            if (outstandingLinks > 0)
            {
                ScheduleNext();
            }

            return(websiteProcessingDefinition.CompletionSource.Task);
        }
コード例 #2
0
 public WebsiteProcessingDefinition(WebsiteDefinition websiteDefinition)
 {
     WebsiteDefinition = websiteDefinition;
 }
コード例 #3
0
 public WebsiteProcessingDefinition(WebsiteDefinition websiteDefinition)
 {
     WebsiteDefinition = websiteDefinition;
 }
コード例 #4
0
ファイル: CrawlScheduler.cs プロジェクト: vpetroff/NetCrawler
        public Task<CrawlResult> Schedule(Website website)
        {
            var existing = websiteDefinitions.FirstOrDefault(x => x.Website == website);
            if (existing != null)
                return websiteProcessingDefinitions[existing].CompletionSource.Task;

            if (website == null || string.IsNullOrWhiteSpace(website.RootUrl))
            {
                var cancelledTask = new TaskCompletionSource<CrawlResult>();
                cancelledTask.SetCanceled();

                return cancelledTask.Task;
            }

            website.RootUrl = website.RootUrl.Split('#')[0].TrimEnd('/');

            WebsiteProcessingDefinition websiteProcessingDefinition;
            lock (websiteLock)
            {
                var processingBlock = CreateProcessingBlock(website);

                var websiteDefinition = new WebsiteDefinition
                    {
                        Website = website,
                        CrawlResult = new CrawlResult(),
                    };

                websiteProcessingDefinition = new WebsiteProcessingDefinition(websiteDefinition)
                    {
                        ProcessingBlock = processingBlock,
                        CompletionSource = new TaskCompletionSource<CrawlResult>()
                    };

                if (websiteProcessingDefinitions.TryAdd(websiteDefinition, websiteProcessingDefinition))
                {
                    websiteDefinitions.Add(websiteDefinition);
                }
            }

            RaiseWebsiteScheduled(website);

            var outstandingLinks = Schedule(new[] { website.RootUrl });
            if (outstandingLinks > 0)
                ScheduleNext();

            return websiteProcessingDefinition.CompletionSource.Task;
        }