Esempio n. 1
0
        private async Task RunCrawl(string hostname)
        {
            HostUrl = VerifyUrlIntegrity(hostname);

            LinksToCrawl.Enqueue(HostUrl.OriginalString);

            while (LinksToCrawl.Any())
            {
                List <Task> tasks = new List <Task>();

                for (var thread = 1; thread <= ConcurrencyLimit && thread <= LinksToCrawl.Count; thread++)
                {
                    var link = new Uri(LinksToCrawl.Dequeue());
                    if (WebsiteMap.ContainsKey(link.Host + link.AbsolutePath))
                    {
                        continue;
                    }
                    tasks.Add(CrawlPage(link.OriginalString));
                }

                await Task.WhenAll(tasks);
            }

            Console.WriteLine($"Found {WebsiteMap.Count} links");
        }
Esempio n. 2
0
        private async Task CrawlPage(string link)
        {
            var pageResults = await _parser.ParsePage(link);

            HandlePageResultLinks(pageResults);

            WebsiteMap.TryAdd(pageResults.PageUrl.Host + pageResults.PageUrl.AbsolutePath, pageResults);

            pageResults.Links.ForEach(x => LinksToCrawl.Enqueue(x));
        }