Example #1
0
        public async Task <CrawlResult> PerformCrawlingAsync(string[] rootUrls)
        {
            var result = new CrawlResult(string.Empty)
            {
                NestedResults = await GetNestedCrawlResultsAsync(rootUrls, FirstDepthLevel)
            };

            return(result);
        }
Example #2
0
        private async Task <CrawlResult> GetCrawlResultAsync(string rootUrl, int depthLevel)
        {
            var crawlResult = new CrawlResult(rootUrl);

            if (depthLevel <= _maxDepthLevel)
            {
                crawlResult.NestedResults = await GetNestedCrawlResultsAsync(rootUrl, depthLevel);
            }
            return(crawlResult);
        }
Example #3
0
        private async Task <CrawlResult> CrawlUrl(string url, int nesting)
        {
            CrawlResult urlsCrawlResult;

            if (nesting <= maxNesting)
            {
                urlsCrawlResult = new CrawlResult();

                if (nesting < maxNesting)
                {
                    string pageHtml = await HtmlUrlSearcher.Instance.GetPageHtml(url);

                    try
                    {
                        foreach (string pageUrl in HtmlUrlSearcher.Instance.GetPageUrls(pageHtml, url))
                        {
                            try
                            {
                                urlsCrawlResult[pageUrl] = await CrawlUrl(pageUrl, nesting + 1);

                                var level = nesting + urlsCrawlResult.Count;
                            }
                            catch (Exception e)
                            {
                                logger.Error(pageUrl + ": ", e);

                                if (urlsCrawlResult.Keys.Contains(pageUrl))
                                {
                                    urlsCrawlResult[pageUrl] = null;
                                }
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        logger.Error(url + ": ", e);
                    }
                }
            }
            else
            {
                urlsCrawlResult = null;
            }

            return(urlsCrawlResult);
        }
Example #4
0
        public async Task <CrawlResult> PerformCrawlingAsync(string[] rootUrls)
        {
            CrawlResult crawlResult = new CrawlResult();

            if (rootUrls.Length > 0)
            {
                foreach (string rootUrl in rootUrls)
                {
                    if (rootUrl != null)
                    {
                        crawlResult[rootUrl] = await CrawlUrl(rootUrl, 0);
                    }
                }
            }
            else
            {
                crawlResult["--invalid urls config info--"] = null;
                logger.Warn("\nWARN: --invalid urls config info--\n");
            }

            return(crawlResult);
        }