Пример #1
0
        public async Task <WebCrawlerOutput> PerformCrawlingAsync(Uri uri, int currentDepth, int parentId)
        {
            if (IsUriUnique(uri))
            {
                // Load web resource
                var pageLoader = new WebPageLoader();
                var loadResult = await pageLoader.LoadAsync(uri);

                // Register resource
                int uriId = AddUri(uri);
                // Generate crawler output
                WebCrawlerOutput output = new WebCrawlerOutput(
                    uriId, uri, loadResult.Response, loadResult.Content);

                NotifyResourceLoadingFinished(parentId, output);

                currentDepth++;
                if (loadResult.Content != null && currentDepth < MaxDepth)
                {
                    ExtractPageLinks(loadResult.Content, uri, output, currentDepth);
                }

                return(output);
            }

            return(null);
        }
Пример #2
0
 private void NotifyResourceLoadingFinished(int parentId, WebCrawlerOutput output)
 {
     if (LoadingFinished != null)
     {
         LoadingFinished(parentId, output);
     }
 }
Пример #3
0
        private async void ExtractPageLinks(byte[] content, Uri parentUri, WebCrawlerOutput output, int depth)
        {
            LinkExtractor extractor        = new LinkExtractor();
            Encoding      responseEncoding = GetEncodingFromResponse(output.Response);
            List <Uri>    childUris        = extractor.ExtractLinksFromPage(parentUri, content, responseEncoding);

            foreach (var link in childUris)
            {
                var childOutput = await PerformCrawlingAsync(link, depth, GetUriId(parentUri));

                if (childOutput != null)
                {
                    output.AddChild(childOutput);
                }
            }
        }
Пример #4
0
 public void AddChild(WebCrawlerOutput child)
 {
     Children.Add(child);
 }