public Task <CrawlResult> Schedule(Website website) { var existing = websiteDefinitions.FirstOrDefault(x => x.Website == website); if (existing != null) { return(websiteProcessingDefinitions[existing].CompletionSource.Task); } if (website == null || string.IsNullOrWhiteSpace(website.RootUrl)) { var cancelledTask = new TaskCompletionSource <CrawlResult>(); cancelledTask.SetCanceled(); return(cancelledTask.Task); } website.RootUrl = website.RootUrl.Split('#')[0].TrimEnd('/'); WebsiteProcessingDefinition websiteProcessingDefinition; lock (websiteLock) { var processingBlock = CreateProcessingBlock(website); var websiteDefinition = new WebsiteDefinition { Website = website, CrawlResult = new CrawlResult(), }; websiteProcessingDefinition = new WebsiteProcessingDefinition(websiteDefinition) { ProcessingBlock = processingBlock, CompletionSource = new TaskCompletionSource <CrawlResult>() }; if (websiteProcessingDefinitions.TryAdd(websiteDefinition, websiteProcessingDefinition)) { websiteDefinitions.Add(websiteDefinition); } } RaiseWebsiteScheduled(website); var outstandingLinks = Schedule(new[] { website.RootUrl }); if (outstandingLinks > 0) { ScheduleNext(); } return(websiteProcessingDefinition.CompletionSource.Task); }
public WebsiteProcessingDefinition(WebsiteDefinition websiteDefinition) { WebsiteDefinition = websiteDefinition; }
public Task<CrawlResult> Schedule(Website website) { var existing = websiteDefinitions.FirstOrDefault(x => x.Website == website); if (existing != null) return websiteProcessingDefinitions[existing].CompletionSource.Task; if (website == null || string.IsNullOrWhiteSpace(website.RootUrl)) { var cancelledTask = new TaskCompletionSource<CrawlResult>(); cancelledTask.SetCanceled(); return cancelledTask.Task; } website.RootUrl = website.RootUrl.Split('#')[0].TrimEnd('/'); WebsiteProcessingDefinition websiteProcessingDefinition; lock (websiteLock) { var processingBlock = CreateProcessingBlock(website); var websiteDefinition = new WebsiteDefinition { Website = website, CrawlResult = new CrawlResult(), }; websiteProcessingDefinition = new WebsiteProcessingDefinition(websiteDefinition) { ProcessingBlock = processingBlock, CompletionSource = new TaskCompletionSource<CrawlResult>() }; if (websiteProcessingDefinitions.TryAdd(websiteDefinition, websiteProcessingDefinition)) { websiteDefinitions.Add(websiteDefinition); } } RaiseWebsiteScheduled(website); var outstandingLinks = Schedule(new[] { website.RootUrl }); if (outstandingLinks > 0) ScheduleNext(); return websiteProcessingDefinition.CompletionSource.Task; }