public async Task <CrawlResult> PerformCrawlingAsync(string[] rootUrls) { var result = new CrawlResult(string.Empty) { NestedResults = await GetNestedCrawlResultsAsync(rootUrls, FirstDepthLevel) }; return(result); }
private async Task <CrawlResult> GetCrawlResultAsync(string rootUrl, int depthLevel) { var crawlResult = new CrawlResult(rootUrl); if (depthLevel <= _maxDepthLevel) { crawlResult.NestedResults = await GetNestedCrawlResultsAsync(rootUrl, depthLevel); } return(crawlResult); }
private async Task <CrawlResult> CrawlUrl(string url, int nesting) { CrawlResult urlsCrawlResult; if (nesting <= maxNesting) { urlsCrawlResult = new CrawlResult(); if (nesting < maxNesting) { string pageHtml = await HtmlUrlSearcher.Instance.GetPageHtml(url); try { foreach (string pageUrl in HtmlUrlSearcher.Instance.GetPageUrls(pageHtml, url)) { try { urlsCrawlResult[pageUrl] = await CrawlUrl(pageUrl, nesting + 1); var level = nesting + urlsCrawlResult.Count; } catch (Exception e) { logger.Error(pageUrl + ": ", e); if (urlsCrawlResult.Keys.Contains(pageUrl)) { urlsCrawlResult[pageUrl] = null; } } } } catch (Exception e) { logger.Error(url + ": ", e); } } } else { urlsCrawlResult = null; } return(urlsCrawlResult); }
public async Task <CrawlResult> PerformCrawlingAsync(string[] rootUrls) { CrawlResult crawlResult = new CrawlResult(); if (rootUrls.Length > 0) { foreach (string rootUrl in rootUrls) { if (rootUrl != null) { crawlResult[rootUrl] = await CrawlUrl(rootUrl, 0); } } } else { crawlResult["--invalid urls config info--"] = null; logger.Warn("\nWARN: --invalid urls config info--\n"); } return(crawlResult); }