public async Task <WebCrawlerOutput> PerformCrawlingAsync(Uri uri, int currentDepth, int parentId) { if (IsUriUnique(uri)) { // Load web resource var pageLoader = new WebPageLoader(); var loadResult = await pageLoader.LoadAsync(uri); // Register resource int uriId = AddUri(uri); // Generate crawler output WebCrawlerOutput output = new WebCrawlerOutput( uriId, uri, loadResult.Response, loadResult.Content); NotifyResourceLoadingFinished(parentId, output); currentDepth++; if (loadResult.Content != null && currentDepth < MaxDepth) { ExtractPageLinks(loadResult.Content, uri, output, currentDepth); } return(output); } return(null); }
private void NotifyResourceLoadingFinished(int parentId, WebCrawlerOutput output) { if (LoadingFinished != null) { LoadingFinished(parentId, output); } }
private async void ExtractPageLinks(byte[] content, Uri parentUri, WebCrawlerOutput output, int depth) { LinkExtractor extractor = new LinkExtractor(); Encoding responseEncoding = GetEncodingFromResponse(output.Response); List <Uri> childUris = extractor.ExtractLinksFromPage(parentUri, content, responseEncoding); foreach (var link in childUris) { var childOutput = await PerformCrawlingAsync(link, depth, GetUriId(parentUri)); if (childOutput != null) { output.AddChild(childOutput); } } }
public void AddChild(WebCrawlerOutput child) { Children.Add(child); }