private async Task RunCrawl(string hostname) { HostUrl = VerifyUrlIntegrity(hostname); LinksToCrawl.Enqueue(HostUrl.OriginalString); while (LinksToCrawl.Any()) { List <Task> tasks = new List <Task>(); for (var thread = 1; thread <= ConcurrencyLimit && thread <= LinksToCrawl.Count; thread++) { var link = new Uri(LinksToCrawl.Dequeue()); if (WebsiteMap.ContainsKey(link.Host + link.AbsolutePath)) { continue; } tasks.Add(CrawlPage(link.OriginalString)); } await Task.WhenAll(tasks); } Console.WriteLine($"Found {WebsiteMap.Count} links"); }
private async Task CrawlPage(string link) { var pageResults = await _parser.ParsePage(link); HandlePageResultLinks(pageResults); WebsiteMap.TryAdd(pageResults.PageUrl.Host + pageResults.PageUrl.AbsolutePath, pageResults); pageResults.Links.ForEach(x => LinksToCrawl.Enqueue(x)); }